clear

cd "D:\data_replication"

use data\firm_size_distribution\zeta_pc8plus.dta, clear
gen ll = length(pc8plus)
gen pc7 = pc8plus
replace pc7 = substr(pc8plus, 1, 7) if ll == 8
sort pc7
by pc7: egen zeta_pc7 = mean(zeta_pc8plus)
by pc7: keep if _n == 1
drop if ll != 8
keep pc7 zeta_pc7
save data\firm_size_distribution\zeta_pc7.dta, replace


use data\firm_size_distribution\zeta_pc8plus.dta, clear
gen ll = length(pc8plus)
gen pc6 = pc8plus
replace pc6 = substr(pc8plus, 1, 6) if ll == 8
sort pc6
by pc6: egen zeta_pc6 = mean(zeta_pc8plus)
by pc6: keep if _n == 1
drop if ll != 8
keep pc6 zeta_pc6
save data\firm_size_distribution\zeta_pc6.dta, replace


use data\firm_size_distribution\zeta_pc8plus.dta, clear
gen ll = length(pc8plus)
gen pc5 = pc8plus
replace pc5 = substr(pc8plus, 1, 5) if ll == 8
sort pc5
by pc5: egen zeta_pc5 = mean(zeta_pc8plus)
by pc5: keep if _n == 1
drop if ll != 8
keep pc5 zeta_pc5
save data\firm_size_distribution\zeta_pc5.dta, replace


use data\firm_size_distribution\zeta_pc8plus.dta, clear
gen ll = length(pc8plus)
gen pc4 = pc8plus
replace pc4 = substr(pc8plus, 1, 4) if ll == 8
sort pc4
by pc4: egen zeta_pc4 = mean(zeta_pc8plus)
by pc4: keep if _n == 1
drop if ll != 8
keep pc4 zeta_pc4
save data\firm_size_distribution\zeta_pc4.dta, replace


use data\firm_size_distribution\zeta_pc8plus.dta, clear
gen ll = length(pc8plus)
gen pc3 = pc8plus
replace pc3 = substr(pc8plus, 1, 3) if ll == 8
sort pc3
by pc3: egen zeta_pc3 = mean(zeta_pc8plus)
by pc3: keep if _n == 1
drop if ll != 8
keep pc3 zeta_pc3
save data\firm_size_distribution\zeta_pc3.dta, replace


use data\firm_size_distribution\zeta_pc8plus.dta, clear
gen ll = length(pc8plus)
gen pc2 = pc8plus
replace pc2 = substr(pc8plus, 1, 2) if ll == 8
sort pc2
by pc2: egen zeta_pc2 = mean(zeta_pc8plus)
by pc2: keep if _n == 1
drop if ll != 8
keep pc2 zeta_pc2
save data\firm_size_distribution\zeta_pc2.dta, replace


use data\firm_size_distribution\zeta_pc8plus.dta, clear
rename pc8plus pc7
rename zeta_pc8plus zeta_pc7
append using data\firm_size_distribution\zeta_pc7.dta
rename pc7 pc6
rename zeta_pc7 zeta_pc6
append using data\firm_size_distribution\zeta_pc6.dta
rename pc6 pc5
rename zeta_pc6 zeta_pc5
append using data\firm_size_distribution\zeta_pc5.dta
rename pc5 pc4
rename zeta_pc5 zeta_pc4
append using data\firm_size_distribution\zeta_pc4.dta
rename pc4 pc3
rename zeta_pc4 zeta_pc3
append using data\firm_size_distribution\zeta_pc3.dta
rename pc3 pc2
rename zeta_pc3 zeta_pc2
append using data\firm_size_distribution\zeta_pc2.dta
rename pc2 pc
rename zeta_pc2 zeta_pc
save data\firm_size_distribution\zeta_pc.dta, replace


use estimation\2_product_list\output\product_id_all.dta, clear
merge m:1 pc using data\firm_size_distribution\zeta_pc.dta

sort product_id pc8plus
drop if _merge == 2
replace zeta_pc = 2.78 if zeta_pc == .

drop _merge
rename zeta_pc zeta_pc_main
rename pc pc_main
rename pc8plus pc
merge m:1 pc using data\firm_size_distribution\zeta_pc.dta
drop if _merge == 2
sort product_id pc
replace zeta_pc = 2.78 if zeta_pc == . & _n > 3817


sort product_id
by product_id: gen nr_pc_sub = _N
by product_id: egen zeta_pc_sub = sum(zeta_pc)
by product_id: keep if _n == 1

gen zeta_product_id = zeta_pc_main
replace zeta_product_id = (zeta_pc_main + zeta_pc_sub) / (nr_pc_sub + 1) if product_id > 3817
keep product_id zeta_product_id
save data\firm_size_distribution\zeta_product_id.dta, replace

//forval j = 1/3817 {

rm data\firm_size_distribution\zeta_pc2.dta
rm data\firm_size_distribution\zeta_pc3.dta
rm data\firm_size_distribution\zeta_pc4.dta
rm data\firm_size_distribution\zeta_pc5.dta
rm data\firm_size_distribution\zeta_pc6.dta
rm data\firm_size_distribution\zeta_pc7.dta

